# fix NA cases with nearest neighbor ----
case_NA <- groups_comune %>% filter(is.na(comune))
# drop points outside of italy
#Northernmost point: Westliches Zwillingsköpfl, Predoi, Alto Adige at 47°5′N 12°11′E
#Southernmost point on the mainland: Capo Spartivento, Calabria at 37°56′N 16°3′E; on Lampedusa, Sicily: Punta Pesce Spada, at 35°29′N 12°36′E
#Westernmost point: Rocca Bernauda, Bardonecchia, Piedmont at 45°6′N 6°37′E
#Easternmost point: Capo d'Otranto, Otranto, Apulia at 40°6′N 18°31′E
case_NA <- case_NA %>% filter(lat>=37.5&lat<=47&lon>=6.3&lon<=18.3)
case_NA <-  case_NA %>% filter(!grepl("Svizzera",group_name))
case_NA <-  case_NA %>% filter(!grepl("Valais",group_name))
NA_points <- case_NA %>% dplyr::select(lon, lat)
# create a points collection
NA_points_sf <- do.call("st_sfc",c(lapply(1:nrow(NA_points),
function(i) {st_point(as.numeric(NA_points[i, ]))}), list("crs" = 4326)))
class(NA_points_sf)
str(NA_points_sf)
sf::st_crs(NA_points_sf)
# convert to planar
NA_points_trans <- st_transform(NA_points_sf, 2163) # apply transformation to pnts sf
NA_shape_trans <- shape_trans
st_crs(NA_shape_trans) == st_crs(NA_points_trans)
# find nearest neighbor and extract state name
closestNA <- list()
for(i in seq_len(nrow(NA_points))){
closestNA[[i]] <- NA_shape_trans[which.min(st_distance(NA_shape_trans, NA_points_trans[i])),4]
}
closest_comune_NA <- do.call(rbind, closestNA)
#st_geometry(closest_comune_NA) <- NULL
case_NA <- cbind(case_NA, as.data.frame(closest_comune_NA[,1]))
case_NA <- case_NA %>% dplyr::select(group_name, group_lon, group_lat, lon, lat, PRO_COM) %>% dplyr::rename(comune=PRO_COM)
# combine files
# clean original files, no sparse cases, no missings (groups abroad)
groups_match <- groups_comune %>% filter(!is.na(comune))
groups_NA <- case_NA
groups_comune <- rbind(groups_match, groups_NA)
groups_comune <- groups_comune[,c(1:3,6)]
# add comune information
name <- as.data.frame(shape[,1:5])
name <- name %>% dplyr::select(-geometry)
groups_comune_full <- merge(groups_comune, name, by.x="comune", by.y="PRO_COM", all.x=TRUE)
# merge with events-level file
groups_comune_merge <- groups_comune_full %>%
dplyr::rename(comune_id=comune,
region_id=COD_REG,
terunit_id=COD_CM,
province_id=COD_PRO,
comune_name=COMUNE) %>%
dplyr::select(region_id, terunit_id, province_id, comune_id, comune_name, group_name)
events_groups_comune <- merge(meetup, groups_comune_merge, by=c("group_name"))
# all non-italian groups are dropped, -32 groups, ~ -3000 events
setdiff(meetup$group_name, events_groups_comune$group_name)
length(meetup)
nrow(meetup)-nrow(events_groups_comune)
head(events_groups_comune)
datpath <- "~/Dropbox/projects/m5S/data/"
write.csv(events_groups_comune, paste0(datpath, "events_groups_comune.csv"))
# M5S - Create comune-level dataset on events
# created: tk, Mon May 13 16:02:58 2019 ---
# update: nov 18, 2019. set up code according to master file.
# Preparation
rm(list = ls())
dev.off()
cat("\014")
# globals
options(scipen=999)
# packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, broom, hrbrthemes, plm, estimatr, sandwich, lmtest, AER, lfe, huxtable, margins, readstata13, texreg, reshape2, readxl)
# often used code
# filter(row_number(value) == 1)
# which(colnames(df)==varnameinquotes)
# define paths
m5spath <- "~/Dropbox/projects/m5S/meetup/"
panelpath <- "~/Dropbox/projects/m5S/data/ITANES_PRE-POST_Referendum_2016/"
datpath <- "~/Dropbox/projects/m5S/data/"
# file names
popfile <- "DCIS_POPRES1_13052019235616360"
# load data
m5s <- read.csv(paste0(datpath, "events_groups_comune.csv"), stringsAsFactors = FALSE)
head(m5s)
m5s$mention_ref <- ifelse(grepl("referendum", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$mention_renzi <- ifelse(grepl("renzi", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$mention_cons <- ifelse(grepl("costituzionale", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$refevent <- ifelse(m5s$mention_ref==1 | m5s$mention_cons==1, 1, 0)
m5s$renzievent <- ifelse(m5s$mention_renzi==1,1,0)
m5s$year <- as.numeric(substr(m5s$local_date,1,4))
barplot(prop.table(table(m5s$year, m5s$refevent),1)[,2], main="Share (Event|Mention Referendum)")
nr_events <- m5s %>% dplyr::select(local_date, yes_rsvp_count, group_id, region_id, terunit_id, province_id, comune_id, comune_name, refevent, renzievent)
nr_events$one <- 1
nr_events$date <- as.Date(nr_events$local_date)
head(nr_events)
table(is.na(nr_events$date))
nr_events%>%filter(is.na(date))
m5s %>% filter(is.na(date))
m5s %>% filter(is.na(local_date))
table(m5s$status[is.na(local_date)]
)
table(m5s$status[is.na(m5s$local_date)])
m5s %>% filter(group_name=="(MonteCompatri) Beppe Grillo Meetup Group")
nr_events <- m5s %>% dplyr::select(local_date, yes_rsvp_count, group_id, region_id, terunit_id, province_id, comune_id, comune_name, refevent, renzievent)
nr_events$one <- 1
nr_events$date <- as.Date(nr_events$local_date)
nr_com <- nr_events %>%
# 365 events without date, status=proposed. do not include, unclear if happened or not.
filter(!is.na(date)) %>%
# aggregate events on comune level
group_by(comune_id) %>%
#find first event
mutate(first_event=min(date)) %>%
# total number of events per comune
mutate(n_total=sum(one),
# nr of events by year(group)
n_startto09=sum(one[date<=as.Date('2008-12-31')]),
n_09to11=sum(one[date>=as.Date('2009-01-01') & date<=as.Date('2011-12-31')]),
n_12to15=sum(one[date>=as.Date('2012-01-01') & date<=as.Date('2015-12-31')]),
n_startto16=sum(one[date<=as.Date('2015-12-31')]),
n_16=sum(one[date>=as.Date('2016-01-01') & date<=as.Date('2016-12-31')]),
n_16toend=sum(one[date>as.Date('2015-12-31')]),
# nr of events for specific time periods related to referendum 2016
# no longer needed. use startto16 or treat_157d instead
#n_pre16=sum(one[date<as.Date('2016-06-01')]),
#n_treat=sum(one[date>=as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
# time spans of equal length
# full itanes panel length
n_treat_157d=sum(one[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')]),
n_pre_157d=sum(one[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')]),
# x months treat
n_treat_24m=sum(one[date>as.Date('2014-12-04') & date<=as.Date('2016-12-04')]),
n_treat_12m=sum(one[date>as.Date('2015-12-04') & date<=as.Date('2016-12-04')]),
n_treat_6m=sum(one[date>as.Date('2016-06-04') & date<=as.Date('2016-12-04')]),
n_treat_120d=sum(one[date>as.Date('2016-08-06') & date<=as.Date('2016-12-04')]),
n_treat_90d=sum(one[date>as.Date('2016-09-05') & date<=as.Date('2016-12-04')]),
n_treat_60d=sum(one[date>as.Date('2016-10-05') & date<=as.Date('2016-12-04')]),
n_treat_30d=sum(one[date>as.Date('2016-11-04') & date<=as.Date('2016-12-04')]),
# campaign duration, starting jan 20 (renzi announces resignation in case of NO)
n_treat_campaign=sum(one[date>as.Date('2016-01-20') & date<=as.Date('2016-12-04')]),
n_treat_referendum=sum(one[date<=as.Date('2016-12-04')]),
# x months pre
n_pre_120d=sum(one[date>as.Date('2016-02-01') & date<as.Date('2016-06-01')]),
n_pre_90d=sum(one[date>as.Date('2016-03-02') & date<as.Date('2016-06-01')]),
n_pre_60d=sum(one[date>as.Date('2016-04-01') & date<as.Date('2016-06-01')]),
n_pre_30d=sum(one[date>as.Date('2016-05-01') & date<as.Date('2016-06-01')]),
# pre-campaign
n_pre_campaign=sum(one[date<as.Date('2016-01-20')]),
# events with reference to renzi or referendum
n_total_renzi=sum(renzievent),
n_total_ref=sum(refevent),
# nr of events in treatment period with reference to either referendum or renzi
n_treat_ref=sum(refevent[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
n_treat_renzi=sum(renzievent[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
# post referendum
n_posttreat=sum(one[date>as.Date('2016-12-04')]),
# nr of events weighted by rsvp
wn_total=sum(one*yes_rsvp_count),
#wn_pre13=sum((one*yes_rsvp_count)[date<as.Date('2013-01-05')]),
#wn_pre16=sum((one*yes_rsvp_count)[date<as.Date('2016-06-01')]),
#wn_treat=sum((one*yes_rsvp_count)[date>=as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
# no longer used
# time spans of equal length
# full itanes panel length
wn_treat_157d=sum((one*yes_rsvp_count)[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')]),
wn_pre_157d=sum((one*yes_rsvp_count)[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')]),
# x months treat
wn_treat_120d=sum((one*yes_rsvp_count)[date>as.Date('2016-08-06') & date<=as.Date('2016-12-04')]),
wn_treat_90d=sum((one*yes_rsvp_count)[date>as.Date('2016-09-05') & date<=as.Date('2016-12-04')]),
wn_treat_60d=sum((one*yes_rsvp_count)[date>as.Date('2016-10-05') & date<=as.Date('2016-12-04')]),
wn_treat_30d=sum((one*yes_rsvp_count)[date>as.Date('2016-11-04') & date<=as.Date('2016-12-04')]),
# campaign duration, starting jan 20 (renzi announces resignation in case of NO)
wn_treat_campaign=sum((one*yes_rsvp_count)[date>as.Date('2016-01-20') & date<=as.Date('2016-12-04')]),
# x months pre
wn_pre_120d=sum((one*yes_rsvp_count)[date>as.Date('2016-02-01') & date<as.Date('2016-06-01')]),
wn_pre_90d=sum((one*yes_rsvp_count)[date>as.Date('2016-03-02') & date<as.Date('2016-06-01')]),
wn_pre_60d=sum((one*yes_rsvp_count)[date>as.Date('2016-04-01') & date<as.Date('2016-06-01')]),
wn_pre_30d=sum((one*yes_rsvp_count)[date>as.Date('2016-05-01') & date<as.Date('2016-06-01')]),
wn_treat_ref=sum((refevent*yes_rsvp_count)[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
wn_treat_renzi=sum((renzievent*yes_rsvp_count)[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
wn_posttreat=sum((one*yes_rsvp_count)[date>as.Date('2016-12-04')])
) %>%
# transform to comune-level data
filter(row_number(one) == 1)
unique(m5s$comune_id)
length(unique(m5s$comune_id))
length(unique(m5s$comune_id)) == length(unique(nr_com$comune_id))
setdiff(m5s$comune_id, nr_com$comune_id)
missing_comune_id <- setdiff(m5s$comune_id, nr_com$comune_id)
missing_comune_id
names(m5s)
m5s %>% filter(comune_id==missing_comune_id[1]) %>% dplyr::select(group_name, event_id, event_name, local_date)
for (i in 1:6) {
m5s %>% filter(comune_id==missing_comune_id[i]) %>% dplyr::select(group_name, event_id, event_name, local_date)
}
for (i in 1:6) {
check <- m5s %>% filter(comune_id==missing_comune_id[i]) %>% dplyr::select(group_name, event_id, event_name, local_date)
print(check)
}
missing_comune_id <- setdiff(m5s$comune_id, nr_com$comune_id)
for (i in 1:length(missing_comune_id)) {
check <- m5s %>% filter(comune_id==missing_comune_id[i]) %>% dplyr::select(group_name, event_id, event_name, local_date)
print(check)
}
# yes
# 6 comuni and a total of 7 events lost because of missing events_date
missing_comune_id <- setdiff(m5s$comune_id, nr_com$comune_id)
for (i in 1:length(missing_comune_id)) {
check <- m5s %>% filter(comune_id==missing_comune_id[i]) %>% dplyr::select(group_name, event_id, event_name, local_date, status)
print(check)
}
# yes
nr_com %>% filter(n_treat_157d<200) %>% ggplot(aes(x=n_treat_157d, y= n_pre_157d)) + geom_point() + geom_abline(slope=1, intercept=0) + theme_bw()
nr_com$hist_early_tot <- nr_com$n_startto09
nr_com$hist_mid_tot <- nr_com$n_09to11
nr_com$hist_late_tot <- nr_com$n_12to15
nr_com$hist_early_share <- nr_com$n_startto09/nr_com$n_startto16
nr_com$hist_early_share[nr_com$n_startto16==0] <- 0
nr_com$hist_mid_share <- nr_com$n_09to11/nr_com$n_startto16
nr_com$hist_mid_share[nr_com$n_startto16==0] <- 0
nr_com$hist_late_share <- nr_com$n_12to15/nr_com$n_startto16
nr_com$hist_late_share[nr_com$n_startto16==0] <- 0
nr_com$hist100 <- ifelse(nr_com$hist_early_share>0.5, 1,0)
nr_com$hist010 <- ifelse(nr_com$hist_mid_share>0.5, 1,0)
nr_com$hist001 <- ifelse(nr_com$hist_late_share>0.5, 1,0)
nr_com$hist_before09_tot <- nr_com$n_startto09
nr_com$hist_before09_share <- nr_com$n_startto09/nr_com$n_total
nr_com$hist_before09_share[nr_com$n_startto09==0] <- 0
nr_com$hist_before16_tot <- nr_com$n_startto16
nr_com$hist_before16_share <- nr_com$n_startto16/nr_com$n_total
nr_com$hist_before16_share[nr_com$n_startto16==0] <- 0
nr_com$hist_before16_share[nr_com$n_16toend==0] <- 1
nr_com$hist_days <- difftime("2016-12-04", nr_com$first_event)
pop <- read.csv(paste0(m5spath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- pop %>% select(ITTER107, Territorio, Value) %>%
rename(comune_id=ITTER107, comune_name=Territorio, pop=Value) %>%
filter(comune!="Italia")
pop <- read.csv(paste0(m5spath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- pop %>% select(ITTER107, Territorio, Value) %>%
rename(comune_id=ITTER107, comune_name=Territorio, pop=Value) %>%
filter(comune_name!="Italia")
head(pop)
head(pop$comune_id)
head(nr_com$comune_id)
class(nr_com$comune_id)
class(pop$comune_id)
pop <- read.csv(paste0(m5spath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- pop %>% select(ITTER107, Territorio, Value) %>%
rename(comune_id_leading0=ITTER107, comune_name=Territorio, pop=Value) %>%
filter(comune_name!="Italia")
pop$comune_id <- as.numeric(as.character(pop$comune_id_leading0))
head(pop)
setdiff(pop$comune_id, nr_com$comune_id)
setdiff(nr_com$comune_id, pop$comune_id)
nr_com$comune_id[1]
# all comuni with events are in pop file.
nr_com$comune_id[1] %in% pop$comune_id
nr_com$comune_id[721] %in% pop$comune_id
nr_com_pop <- merge(nr_com, pop, by=c("comune_id"), all=TRUE)
head(nr_com_pop)
# string matching creates non-merges of valid cases.
# setdiff(x,y) are those elements in x but not in y
com_nonmerge <- setdiff(nr_com$comune_id, pop$comune_id)
com_nonmerge
# string matching creates non-merges of valid cases.
# setdiff(x,y) are those elements in x but not in y
setdiff(nr_com$comune_id, pop$comune_id)
head(nr_com_pop)
nr_com_pop %>% filter(comune_name=="Napoli")
nr_com_pop %>% filter(comune_name.y=="Napoli")
panel <- read.dta13(paste0(panelpath, "itanes_referendum_panel.dta"))
warnings()
head(panel)
table(panel$comune_1 %in% nr_com_pop$comune_name.y)
setdiff(panel$ïitter107, nr_com_pop$comune_id)
table(panel$ïitter107=="")
panel$comune_id <- as.numeric(as.character(panel$ïitter107))
head(panel$comune_id)
table(panel$comune_id)
setdiff(panel$comune_id, nr_com_pop$comune_id)
setdiff(panel$comune_id[!is.na(panel$comune_id)], nr_com_pop$comune_id)
# M5S - Create comune-level dataset on events
# created: tk, Mon May 13 16:02:58 2019 ---
# update: nov 18, 2019. set up code according to master file.
# Preparation
rm(list = ls())
dev.off()
cat("\014")
# globals
options(scipen=999)
# packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, broom, hrbrthemes, plm, estimatr, sandwich, lmtest, AER, lfe, huxtable, margins, readstata13, texreg, reshape2, readxl)
# often used code
# filter(row_number(value) == 1)
# which(colnames(df)==varnameinquotes)
# define paths
m5spath <- "~/Dropbox/projects/m5S/meetup/"
panelpath <- "~/Dropbox/projects/m5S/data/ITANES_PRE-POST_Referendum_2016/"
datpath <- "~/Dropbox/projects/m5S/data/"
# file names
popfile <- "DCIS_POPRES1_13052019235616360"
# load data
m5s <- read.csv(paste0(datpath, "events_groups_comune.csv"), stringsAsFactors = FALSE)
m5s$mention_ref <- ifelse(grepl("referendum", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$mention_renzi <- ifelse(grepl("renzi", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$mention_cons <- ifelse(grepl("costituzionale", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$refevent <- ifelse(m5s$mention_ref==1 | m5s$mention_cons==1, 1, 0)
m5s$renzievent <- ifelse(m5s$mention_renzi==1,1,0)
m5s$year <- as.numeric(substr(m5s$local_date,1,4))
barplot(prop.table(table(m5s$year, m5s$refevent),1)[,2], main="Share (Event|Mention Referendum)")
nr_events <- m5s %>% dplyr::select(local_date, yes_rsvp_count, group_id, region_id, terunit_id, province_id, comune_id, comune_name, refevent, renzievent)
nr_events$one <- 1
nr_events$date <- as.Date(nr_events$local_date)
nr_com <- nr_events %>%
# 365 events without date, status=proposed. do not include, unclear if happened or not.
filter(!is.na(date)) %>%
# aggregate events on comune level
group_by(comune_id) %>%
#find first event
mutate(first_event=min(date)) %>%
# total number of events per comune
mutate(n_total=sum(one),
# nr of events by year(group)
n_startto09=sum(one[date<=as.Date('2008-12-31')]),
n_09to11=sum(one[date>=as.Date('2009-01-01') & date<=as.Date('2011-12-31')]),
n_12to15=sum(one[date>=as.Date('2012-01-01') & date<=as.Date('2015-12-31')]),
n_startto16=sum(one[date<=as.Date('2015-12-31')]),
n_16=sum(one[date>=as.Date('2016-01-01') & date<=as.Date('2016-12-31')]),
n_16toend=sum(one[date>as.Date('2015-12-31')]),
# nr of events for specific time periods related to referendum 2016
# no longer needed. use startto16 or treat_157d instead
#n_pre16=sum(one[date<as.Date('2016-06-01')]),
#n_treat=sum(one[date>=as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
# time spans of equal length
# full itanes panel length
n_treat_157d=sum(one[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')]),
n_pre_157d=sum(one[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')]),
# x months treat
n_treat_24m=sum(one[date>as.Date('2014-12-04') & date<=as.Date('2016-12-04')]),
n_treat_12m=sum(one[date>as.Date('2015-12-04') & date<=as.Date('2016-12-04')]),
n_treat_6m=sum(one[date>as.Date('2016-06-04') & date<=as.Date('2016-12-04')]),
n_treat_120d=sum(one[date>as.Date('2016-08-06') & date<=as.Date('2016-12-04')]),
n_treat_90d=sum(one[date>as.Date('2016-09-05') & date<=as.Date('2016-12-04')]),
n_treat_60d=sum(one[date>as.Date('2016-10-05') & date<=as.Date('2016-12-04')]),
n_treat_30d=sum(one[date>as.Date('2016-11-04') & date<=as.Date('2016-12-04')]),
# campaign duration, starting jan 20 (renzi announces resignation in case of NO)
n_treat_campaign=sum(one[date>as.Date('2016-01-20') & date<=as.Date('2016-12-04')]),
n_treat_referendum=sum(one[date<=as.Date('2016-12-04')]),
# x months pre
n_pre_120d=sum(one[date>as.Date('2016-02-01') & date<as.Date('2016-06-01')]),
n_pre_90d=sum(one[date>as.Date('2016-03-02') & date<as.Date('2016-06-01')]),
n_pre_60d=sum(one[date>as.Date('2016-04-01') & date<as.Date('2016-06-01')]),
n_pre_30d=sum(one[date>as.Date('2016-05-01') & date<as.Date('2016-06-01')]),
# pre-campaign
n_pre_campaign=sum(one[date<as.Date('2016-01-20')]),
# events with reference to renzi or referendum
n_total_renzi=sum(renzievent),
n_total_ref=sum(refevent),
# nr of events in treatment period with reference to either referendum or renzi
n_treat_ref=sum(refevent[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
n_treat_renzi=sum(renzievent[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
# post referendum
n_posttreat=sum(one[date>as.Date('2016-12-04')]),
# nr of events weighted by rsvp
wn_total=sum(one*yes_rsvp_count),
#wn_pre13=sum((one*yes_rsvp_count)[date<as.Date('2013-01-05')]),
#wn_pre16=sum((one*yes_rsvp_count)[date<as.Date('2016-06-01')]),
#wn_treat=sum((one*yes_rsvp_count)[date>=as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
# no longer used
# time spans of equal length
# full itanes panel length
wn_treat_157d=sum((one*yes_rsvp_count)[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')]),
wn_pre_157d=sum((one*yes_rsvp_count)[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')]),
# x months treat
wn_treat_120d=sum((one*yes_rsvp_count)[date>as.Date('2016-08-06') & date<=as.Date('2016-12-04')]),
wn_treat_90d=sum((one*yes_rsvp_count)[date>as.Date('2016-09-05') & date<=as.Date('2016-12-04')]),
wn_treat_60d=sum((one*yes_rsvp_count)[date>as.Date('2016-10-05') & date<=as.Date('2016-12-04')]),
wn_treat_30d=sum((one*yes_rsvp_count)[date>as.Date('2016-11-04') & date<=as.Date('2016-12-04')]),
# campaign duration, starting jan 20 (renzi announces resignation in case of NO)
wn_treat_campaign=sum((one*yes_rsvp_count)[date>as.Date('2016-01-20') & date<=as.Date('2016-12-04')]),
# x months pre
wn_pre_120d=sum((one*yes_rsvp_count)[date>as.Date('2016-02-01') & date<as.Date('2016-06-01')]),
wn_pre_90d=sum((one*yes_rsvp_count)[date>as.Date('2016-03-02') & date<as.Date('2016-06-01')]),
wn_pre_60d=sum((one*yes_rsvp_count)[date>as.Date('2016-04-01') & date<as.Date('2016-06-01')]),
wn_pre_30d=sum((one*yes_rsvp_count)[date>as.Date('2016-05-01') & date<as.Date('2016-06-01')]),
wn_treat_ref=sum((refevent*yes_rsvp_count)[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
wn_treat_renzi=sum((renzievent*yes_rsvp_count)[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
wn_posttreat=sum((one*yes_rsvp_count)[date>as.Date('2016-12-04')])
) %>%
# transform to comune-level data
filter(row_number(one) == 1) %>%
ungroup
# 6 comuni and a total of 7 events lost because of missing events_date
missing_comune_id <- setdiff(m5s$comune_id, nr_com$comune_id)
for (i in 1:length(missing_comune_id)) {
check <- m5s %>% filter(comune_id==missing_comune_id[i]) %>% dplyr::select(group_name, event_id, event_name, local_date, status)
print(check)
}
# yes
# relatively balanced. no clear increase in treatment period across comuni.
nr_com %>% filter(n_treat_157d<200) %>% ggplot(aes(x=n_treat_157d, y= n_pre_157d)) + geom_point() + geom_abline(slope=1, intercept=0) + theme_bw()
# add history variables by comune (when were groups active?)
# phase 1+2 = pre 09
# phase 3 = 2009-2011
# phase 4: 2012-2015
nr_com$hist_early_tot <- nr_com$n_startto09
nr_com$hist_mid_tot <- nr_com$n_09to11
nr_com$hist_late_tot <- nr_com$n_12to15
nr_com$hist_early_share <- nr_com$n_startto09/nr_com$n_startto16
nr_com$hist_early_share[nr_com$n_startto16==0] <- 0
nr_com$hist_mid_share <- nr_com$n_09to11/nr_com$n_startto16
nr_com$hist_mid_share[nr_com$n_startto16==0] <- 0
nr_com$hist_late_share <- nr_com$n_12to15/nr_com$n_startto16
nr_com$hist_late_share[nr_com$n_startto16==0] <- 0
nr_com$hist100 <- ifelse(nr_com$hist_early_share>0.5, 1,0)
nr_com$hist010 <- ifelse(nr_com$hist_mid_share>0.5, 1,0)
nr_com$hist001 <- ifelse(nr_com$hist_late_share>0.5, 1,0)
nr_com$hist_before09_tot <- nr_com$n_startto09
nr_com$hist_before09_share <- nr_com$n_startto09/nr_com$n_total
nr_com$hist_before09_share[nr_com$n_startto09==0] <- 0
nr_com$hist_before16_tot <- nr_com$n_startto16
nr_com$hist_before16_share <- nr_com$n_startto16/nr_com$n_total
nr_com$hist_before16_share[nr_com$n_startto16==0] <- 0
nr_com$hist_before16_share[nr_com$n_16toend==0] <- 1
nr_com$hist_days <- difftime("2016-12-04", nr_com$first_event)
# add population weights
pop <- read.csv(paste0(m5spath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- pop %>% select(ITTER107, Territorio, Value) %>%
rename(comune_id_leading0=ITTER107, comune_name=Territorio, pop=Value) %>%
filter(comune_name!="Italia")
pop$comune_id <- as.numeric(as.character(pop$comune_id_leading0))
setdiff(nr_com$comune_id, pop$comune_id)
# all comuni with events are in pop file.
nr_com$comune_id[1] %in% pop$comune_id
nr_com$comune_id[721] %in% pop$comune_id
# merge
nr_com_pop <- merge(nr_com, pop, by=c("comune_id"), all=TRUE)
# check overlap with panel data
panel <- read.dta13(paste0(panelpath, "itanes_referendum_panel.dta"))
table(panel$comune_1 %in% nr_com_pop$comune_name.y)
panel$comune_id <- as.numeric(as.character(panel$ïitter107))
setdiff(panel$comune_id, nr_com_pop$comune_id)
setdiff(panel$comune_id[!is.na(panel$comune_id)], nr_com_pop$comune_id)
# 100% matched, only missings in panel left.
write.csv(nr_com_pop, paste0(m5spath, "events_comune.csv"))
nrow(pop)
head(pop)
popfile <- "DCIS_POPRES1_18112019184308008"
pop <- read.csv(paste0(m5spath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- read.csv(paste0(datpath, popfile, ".csv"), stringsAsFactors = FALSE)
popfile <- "DCIS_POPRES1_18112019185956659"
pop <- read.csv(paste0(datpath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- read.csv(paste0(datpath, popfile, ".csv"), stringsAsFactors = FALSE)
head(pop)
tab(pop$Seleziona.periodo)
table(pop$Seleziona.periodo)
pop <- read.csv(paste0(datpath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- pop %>% select(ITTER107, Territorio, Value) %>%
rename(comune_id_leading0=ITTER107, comune_name=Territorio, pop=Value) %>%
filter(comune_name!="Italia")
head(pop)
nrow(pop)
nrow(shape)
mappath <- "~/Dropbox/projects/m5S/data/maps/Italy_shapefile/Limiti_2016_ED50_g/Com2016_ED50_g/"
m5spath <- "~/Dropbox/projects/m5S/meetup/"
datpath <- "~/Dropbox/projects/m5S/data/"
# aggregate comune shapes from denominazi
# shape file
shape <- sf::st_read(paste0(mappath, "Com2016_ED50_g.shp"), stringsAsFactors=FALSE)
# transform coordinates to lat/lon
shape <- sf::st_transform(shape, "+proj=longlat +ellps=WGS84 +datum=WGS84")
class(shape)
str(shape)
sf::st_crs(shape)
nrow(shape)
pop$comune_id <- as.numeric(as.character(pop$comune_id_leading0))
head(pop)
setdiff(nr_com$comune_id, pop$comune_id)
# all comuni with events are in pop file.
nr_com$comune_id[1] %in% pop$comune_id
nr_com$comune_id[721] %in% pop$comune_id
# merge
nr_com_pop <- merge(nr_com, pop, by=c("comune_id"), all=TRUE)
# check overlap with panel data
panel <- read.dta13(paste0(panelpath, "itanes_referendum_panel.dta"))
table(panel$comune_1 %in% nr_com_pop$comune_name.y)
panel$comune_id <- as.numeric(as.character(panel$ïitter107))
setdiff(panel$comune_id, nr_com_pop$comune_id)
setdiff(panel$comune_id[!is.na(panel$comune_id)], nr_com_pop$comune_id)
# 100% matched, only missings in panel left.
# all matched except 1: 22149
panel %>% filter(comune_id==22149)
pop <- read.csv(paste0(datpath, popfile, ".csv"), stringsAsFactors = FALSE)
pop <- pop %>% select(ITTER107, Territorio, Value) %>%
rename(comune_id_leading0=ITTER107, comune_name=Territorio, pop=Value) %>%
filter(comune_name!="Italia")
nrow(pop)
# 7998, correct nr of comuni end of 2016, like in shape file.
pop$comune_id <- as.numeric(as.character(pop$comune_id_leading0))
setdiff(nr_com$comune_id, pop$comune_id)
# all comuni with events are in pop file.
nr_com$comune_id[1] %in% pop$comune_id
nr_com$comune_id[721] %in% pop$comune_id
# merge
nr_com_pop <- merge(nr_com, pop, by=c("comune_id"), all=TRUE)
# check overlap with panel data
panel <- read.dta13(paste0(panelpath, "itanes_referendum_panel.dta"))
table(panel$comune_1 %in% nr_com_pop$comune_name.y)
panel$comune_id <- as.numeric(as.character(panel$ïitter107))
setdiff(panel$comune_id, nr_com_pop$comune_id)
nonmatch <- setdiff(panel$comune_id[!is.na(panel$comune_id)], nr_com_pop$comune_id)
# all matched except 1: 22149
panel %>% filter(comune_id==22149)
write.csv(nr_com_pop, paste0(m5spath, "events_comune.csv"))
write.csv(nr_com_pop, paste0(datpath, "events_comune.csv"))
